home *** CD-ROM | disk | FTP | other *** search
- {*********************************************************}
- {* AASorter *}
- {* Copyright (c) Julian M Bucknall 2000 *}
- {* All rights reserved. *}
- {*********************************************************}
- {* Algorithms Alfresco: Sorter class *}
- {*********************************************************}
-
- {Note: this unit is released as freeware. In other words, you are free
- to use this unit in your own applications, however I retain all
- copyright to the code. JMB}
-
- unit AASorter;
-
- interface
-
- uses
- Windows,
- SysUtils,
- Classes;
-
- type
- {function prototype to compare two items;
- returns integer <0 if Item1<Item2, =0 if equal, >0 otherwise}
- TaaMergeCompare = function (const aItem1, aItem2 : pointer) : integer;
-
- type
- TaaSorter = class
- private
- FBuffer : PChar;
- FCompare : TaaMergeCompare;
- FCurRec : integer;
- FDestFile : TObject;
- FF1 : TObject;
- FF2 : TObject;
- FG1 : TObject;
- FG2 : TObject;
- FMaxRecCount : integer;
- FRecCount : integer;
- FRecLen : integer;
- FSrcFile : TObject;
- FState : integer;
- protected
- procedure srSetCompare(aValue : TaaMergeCompare);
- procedure srMaxRecCount(aValue : integer);
- procedure srSetRecLen(aValue : integer);
-
- procedure srCreateMergeFiles;
- procedure srGetBuffer;
- procedure srMergeFiles;
- procedure srQuickSortBuffer;
- public
- constructor Create;
- destructor Destroy; override;
-
- procedure Add(var aRecord);
- {add a new record to the sorter}
- function Get(var aRecord) : boolean;
- {get a record from the sorter in sequence; returns true if a
- record was retrieved, false if all records have been
- obtained}
- procedure Reset;
- {reset the sorter to return to the "adding records" stage; all
- records in the sorter are discarded}
-
- property Compare : TaaMergeCompare
- read FCompare write srSetCompare;
- {the comparison function}
- property MaxRecordCount : integer
- read FMaxRecCount write srMaxRecCount;
- {maximum number of records to be held by the sorter before
- writing them to disk and performing file merges}
- property RecordLength : integer
- read FRecLen write srSetRecLen;
- {length of the records begin added}
- end;
-
- implementation
-
- const {internal states for the sorter}
- WaitingState = 0; {empty, waiting for the first record}
- AddingState = 1; {adding records, no flush to disk yet}
- AddWithMergeState = 2; {adding records, will require file merge}
- GettingState = 3; {getting records}
- GetWithMergeState = 4; {getting records from merged file}
- FinishedState = 5; {all records have been retrieved}
-
- {===Quicksort========================================================}
- procedure QuickSort(aBuffer : PChar;
- aRecLen : integer;
- aCount : integer;
- aCompare : TaaMergeCompare);
- var
- Temp : pointer;
- {------}
- function Partition(L, R : integer): integer;
- var
- Left : PChar;
- Right: PChar;
- Last : PChar;
- First: PChar;
- begin
- {set up the indexes}
- Left := aBuffer + (L * aRecLen);
- First := Left;
- Right := aBuffer + (pred(R) * aRecLen);
- {get the partition element}
- Last := Right + aRecLen;
- {do forever (we'll break out of the loop when needed)}
- while true do begin
- {find the first element greater than or equal to the partition
- element from the left; note that our partition element will
- stop this loop}
- while (aCompare(Left, Last) < 0) do
- inc(Left, aRecLen);
- {find the first element less than the partition element from the
- right; check to break out of the loop if we hit the left
- element - we have no sentinel there}
- while (aCompare(Last, Right) < 0) do begin
- if (Right = First) then
- Break;
- dec(Right, aRecLen);
- end;
- {if we crossed get out of this infinite loop to swap the
- partition element into place}
- if (Left >= Right) then
- Break;
- {otherwise swap the two out-of-place elements}
- Move(Left^, Temp^, aRecLen);
- Move(Right^, Left^, aRecLen);
- Move(Temp^, Right^, aRecLen);
- {and continue}
- inc(Left, aRecLen);
- dec(Right, aRecLen);
- end;
- {swap the partition element into place, return the dividing index}
- Move(Left^, Temp^, aRecLen);
- Move(Last^, Left^, aRecLen);
- Move(Temp^, Last^, aRecLen);
- Result := (Left - aBuffer) div aRecLen;
- end;
- {------}
- procedure QuickSortPrim(L, R : integer);
- var
- DividingItem : integer;
- begin
- {stop the recursion, if needed}
- if (R <= L) then
- Exit;
- {otherwise, partition about the final element in the set}
- DividingItem := Partition(L, R);
- {recursively quicksort the two subsets either side of the dividing
- element}
- QuicksortPrim(L, pred(DividingItem));
- QuicksortPrim(succ(DividingItem), R);
- end;
- {------}
- begin
- GetMem(Temp, aRecLen);
- try
- QuickSortPrim(0, pred(aCount));
- finally
- FreeMem(Temp);
- end;
- end;
- {====================================================================}
-
-
- {===Mergesort=================================================================}
- function ReadRecFixed(aStream : TStream;
- var aBuffer;
- aRecLen : integer) : boolean;
- var
- BytesRead : longint;
- begin
- BytesRead := aStream.Read(aBuffer, aRecLen);
- Result := BytesRead = aRecLen;
- end;
- {--------}
- function MergeRunsFixed(aF1 : TStream;
- aF2 : TStream;
- aG1 : TStream;
- aG2 : TStream;
- aRecLen : integer;
- aRunLen : integer;
- aCompare: TaaMergeCompare) : boolean;
- const
- FirstFile = false;
- SecondFile = true;
- type
- {a record that describes the processing of a single input file}
- TInputFile = packed record
- ifStrm : TStream; {stream}
- ifRec : pointer; {record buffer}
- ifRecsInRun : integer; {records to go in run}
- ifEOF : boolean; {stream is exhausted}
- end;
- var
- F : array[boolean] of TInputFile;
- G : array [boolean] of TStream;
- SrcFile : boolean;
- DestFile : boolean;
- FileId : boolean;
- begin
- {assume that this merge pass will finish completely}
- Result := true;
- {initialize the input file records}
- with F[FirstFile] do begin
- ifStrm := aF1;
- ifRec := nil;
- ifRecsInRun := 0;
- ifEOF := false;
- end;
- with F[SecondFile] do begin
- ifStrm := aF2;
- ifRec := nil;
- ifRecsInRun := 0;
- ifEOF := false;
- end;
- {set up the output files}
- G[FirstFile] := aG1;
- G[SecondFile] := aG2;
- try
- {clear the output streams}
- {NOTE: this only works for Delphi 3 and above, since only
- their TStreams have a SetSize accessor method}
- G[FirstFile].Size := 0;
- G[SecondFile].Size := 0;
- {reset the input streams, allocate the record buffers,
- and set the EOF flags}
- for FileId := FirstFile to SecondFile do
- with F[FileId] do begin
- ifStrm.Seek(0, soFromBeginning);
- GetMem(ifRec, aRecLen);
- ifEOF := ifStrm.Size = 0;
- end;
- {make sure the first output goes to G1}
- DestFile := FirstFile;
- {cycle until we manage to exhaust both input files}
- while (not F[FirstFile].ifEOF) or
- (not F[SecondFile].ifEOF) do begin
- {if we start writing to the second file, we won't finish
- the merge process this time}
- if (DestFile = SecondFile) then
- Result := false;
- {initialize ready for merging next runs}
- F[FirstFile].ifRecsInRun := aRunLen;
- F[SecondFile].ifRecsInRun := aRunLen;
- {read the first two records in the respective runs}
- with F[FirstFile] do
- if ReadRecFixed(ifStrm, ifRec^, aRecLen) then
- dec(ifRecsInRun)
- else begin
- ifRecsInRun := -1;
- ifEOF := true;
- end;
- with F[SecondFile] do
- if ReadRecFixed(ifStrm, ifRec^, aRecLen) then
- dec(ifRecsInRun)
- else begin
- ifRecsInRun := -1;
- ifEOF := true;
- end;
- {merge the two runs--one from F1 and the other from F2}
- while ((F[FirstFile].ifRecsInRun >= 0) or
- (F[SecondFile].ifRecsInRun >= 0)) do begin
- {find the smaller record of the two current ones}
- {if the run from F1 is exhausted then the record from
- F2 is the 'smaller'}
- if (F[FirstFile].ifRecsInRun < 0) then
- SrcFile := SecondFile
- {if the run from F2 is exhausted then the record from
- F1 is the 'smaller'}
- else if (F[SecondFile].ifRecsInRun < 0) then
- SrcFile := FirstFile
- {otherwise we need to actually compare the records to
- find the smaller}
- else
- SrcFile :=
- aCompare(F[FirstFile].ifRec, F[SecondFile].ifRec) > 0;
- {write the smaller record to the current output file}
- G[DestFile].WriteBuffer(F[SrcFile].ifRec^, aRecLen);
- {read the next record from the file whose record we just used}
- with F[SrcFile] do
- if (ifRecsInRun <= 0) then
- ifRecsInRun := -1
- else if ReadRecFixed(ifStrm, ifRec^, aRecLen) then
- dec(ifRecsInRun)
- else begin
- ifRecsInRun := -1;
- ifEOF := true;
- end
- end;
- {having merged two runs, switch output files}
- DestFile := not DestFile;
- end;
- finally
- if (F[SecondFile].ifRec <> nil) then
- FreeMem(F[SecondFile].ifRec);
- if (F[FirstFile].ifRec <> nil) then
- FreeMem(F[FirstFile].ifRec);
- end;
- end;
- {--------}
- function MergesortFixed(aF1 : TFileStream;
- aF2 : TFileStream;
- aG1 : TFileStream;
- aG2 : TFileStream;
- aRecLen : integer;
- aRunLen : integer;
- aCompare : TaaMergeCompare) : boolean;
- var
- Merged : boolean;
- FIsSrc : boolean;
- begin
- {perform the first merge pass}
- Merged := MergeRunsFixed(aF1, aF2, aG1, aG2,
- aRecLen, aRunLen, aCompare);
- {now we continually merge the runs until we end up with a single
- file containing all the records}
- FIsSrc := true;
- while not Merged do begin
- aRunLen := aRunLen * 2;
- FIsSrc := not FIsSrc;
- if FIsSrc then
- Merged := MergeRunsFixed(aF1, aF2, aG1, aG2,
- aRecLen, aRunLen, aCompare)
- else
- Merged := MergeRunsFixed(aG1, aG2, aF1, aF2,
- aRecLen, aRunLen, aCompare);
- end;
- {we've now merged all the records into either F1 or G1; return true
- if it's F1, false if it's G1}
- Result := not FIsSrc;
- end;
- {====================================================================}
-
-
- {===TaaTempFileStream================================================}
- type
- TaaTempFileStream = class(TFileStream)
- private
- FFileName : string;
- FDelete : boolean;
- protected
- public
- constructor Create(const aPath : string; aMode : word);
- destructor Destroy; override;
-
- property DeleteOnDestroy : boolean read FDelete write FDelete;
- property FileName : string read FFileName;
- end;
- {--------}
- constructor TaaTempFileStream.Create(const aPath : string; aMode : word);
- var
- PathNameZ : array [0..MAX_PATH] of char;
- FileNameZ : array [0..MAX_PATH] of char;
- begin
- {get the path for temporary files}
- if (aPath = '') then
- GetTempPath(sizeof(PathNameZ), PathNameZ)
- else
- StrLCopy(PathNameZ, PChar(aPath), sizeof(PathNameZ));
- {create a temporary file}
- GetTempFileName(PathNameZ, 'AA', 0, FileNameZ);
- FFileName := FileNameZ;
- {this last step will have created the file, so open it}
- inherited Create(FileName, aMode);
- end;
- {--------}
- destructor TaaTempFileStream.Destroy;
- begin
- {close the file}
- inherited Destroy;
- {if we're asked to delete the file, do so}
- if DeleteOnDestroy then
- DeleteFile(FileName);
- end;
- {====================================================================}
-
-
- {===TaaSorter========================================================}
- constructor TaaSorter.Create;
- begin
- inherited Create;
- end;
- {--------}
- destructor TaaSorter.Destroy;
- begin
- Reset;
- RecordLength := 0;
- inherited Destroy;
- end;
- {--------}
- procedure TaaSorter.Add(var aRecord);
- begin
- {the add method can only be called if we're not in the middle of
- getting records}
- Assert((FState <> GettingState) and (FState <> GetWithMergeState) ,
- 'cannot add new records whilst in the process of getting them; call Reset first');
-
- {there's no point in adding records if we have no comparison method}
- Assert(Assigned(FCompare),
- 'Sorter has no comparison method');
-
- {if we've no buffer, allocate one}
- if (FBuffer = nil) then
- srGetBuffer;
-
- {check to see whether we've filled the buffer}
- if (FRecCount = FMaxRecCount) then begin
- {if this was the first time that we filled the buffer, create the
- merge files}
- if (FState = AddingState) then
- srCreateMergeFiles;
- {sort then copy this bufferful of records to the correct Fx file}
- srQuicksortBuffer;
- TFileStream(FDestFile).WriteBuffer(FBuffer^, FRecLen * FRecCount);
- {change the destination file for the next one}
- if (FDestFile = FF1) then
- FDestFile := FF2
- else
- FDestFile := FF1;
- {reset the buffer}
- FRecCount := 0;
- {make sure the state is correct}
- FState := AddWithMergeState;
- end;
-
- {add this record to the buffer}
- Move(aRecord, FBuffer[FRecLen * FRecCount], FRecLen);
- inc(FRecCount);
-
- {make sure the state is correct}
- if ((FState = WaitingState) or (FState = FinishedState)) then
- FState := AddingState;
- end;
- {--------}
- function TaaSorter.Get(var aRecord) : boolean;
- var
- BytesRead : integer;
- begin
- {the get method can only be called if the sorter is not waiting for
- records to be added}
- Assert((FState <> WaitingState),
- 'cannot get new records if no records have yet been added');
-
- {get rid of the simple case}
- if (FState = FinishedState) then begin
- Result := false;
- Exit;
- end;
-
- {if the state is "adding records" then we need to quicksort the
- buffer and change the state to "getting records"}
- if (FState = AddingState) then begin
- srQuicksortBuffer;
- FCurRec := 0;
- FState := GettingState;
- end;
-
- {if the state is "adding records using mergefile" then we need to
- write out the final buffer to the correct destination file, and
- merge the files. The state gets changed to "getting records with
- merge"}
- if (FState = AddWithMergeState) then begin
- srQuicksortBuffer;
- TFileStream(FDestFile).WriteBuffer(FBuffer^, FRecLen * FRecCount);
- srMergeFiles;
- FCurRec := 0;
- FRecCount := 0;
- FState := GetWithMergeState;
- end;
-
- Assert((FState = GettingState) or (FState = GetWithMergeState),
- 'The sorter state is incorrect half way through the Get method');
-
- {if the state is "getting records" return the next one in the
- buffer; if there is none, return false}
- if (FState = GettingState) then begin
- if (FCurRec = FRecCount) then begin
- Result := false;
- FState := FinishedState;
- end
- else begin
- Move(FBuffer[FCurRec * FRecLen], aRecord, FRecLen);
- inc(FCurRec);
- Result := true;
- end;
- end
-
- {if the state is "getting records with merge" return the next one in
- the buffer; if there is none, try and read another buffer full from
- the final merge file; if there's still none, we're finished}
- else begin
- if (FCurRec = FRecCount) then begin
- BytesRead := TFileStream(FSrcFile).Read(FBuffer^,
- FMaxRecCount * FRecLen);
- {if there's nothing left in the final merge file, we're done}
- if (BytesRead = 0) then begin
- Result := false;
- FState := FinishedState;
- Exit;
- end;
- {calculate the number of records in this final buffer}
- FRecCount := BytesRead div FRecLen;
- FCurRec := 0;
- end;
- {copy the current record over}
- Move(FBuffer[FCurRec * FRecLen], aRecord, FRecLen);
- inc(FCurRec);
- Result := true;
- end;
- end;
- {--------}
- procedure TaaSorter.Reset;
- begin
- {if we have merge files, close and delete them}
- FF1.Free;
- FF2.Free;
- FG1.Free;
- FG2.Free;
- FF1 := nil;
- FF2 := nil;
- FG1 := nil;
- FG2 := nil;
-
- {reset the object to the "waiting for records" state}
- FRecCount := 0;
- FState := WaitingState;
- end;
- {--------}
- procedure TaaSorter.srCreateMergeFiles;
- begin
- Assert((FF1=nil) and (FF2=nil) and (FG1=nil) and (FG2=nil),
- 'CreateMergeFiles has been called with the mergefiles already created');
- FF1 := TaaTempFileStream.Create('', fmOpenReadWrite);
- TaaTempFileStream(FF1).DeleteOnDestroy := true;
- FF2 := TaaTempFileStream.Create('', fmOpenReadWrite);
- TaaTempFileStream(FF2).DeleteOnDestroy := true;
- FG1 := TaaTempFileStream.Create('', fmOpenReadWrite);
- TaaTempFileStream(FG1).DeleteOnDestroy := true;
- FG2 := TaaTempFileStream.Create('', fmOpenReadWrite);
- TaaTempFileStream(FG2).DeleteOnDestroy := true;
- FDestFile := FF1;
- end;
- {--------}
- procedure TaaSorter.srGetBuffer;
- var
- TestSize : Int64;
- Size : integer;
- begin
- Assert(FBuffer = nil,
- 'GetBuffer was called with the buffer already allocated');
-
- {to avoid problems we'll check that the record length multiplied by
- the max count does not exceed 10MB (an arbitrary value); it it is
- we'll invisibly change the max record count}
- TestSize := Int64(FMaxRecCount) * FRecLen;
- if (TestSize <= 10 * 1024 * 1024) then
- Size := TestSize
- else begin
- FMaxRecCount := (10 * 1024 * 1024) div FRecLen;
- Size := FMaxRecCount * FRecLen;
- end;
-
- {allocate the memory}
- GetMem(FBuffer, Size);
- FRecCount := 0;
- end;
- {--------}
- procedure TaaSorter.srMergeFiles;
- begin
- if MergeSortFixed(TFileStream(FF1), TFileStream(FF2),
- TFileStream(FG1), TFileStream(FG2),
- FRecLen, FMaxRecCount, FCompare) then
- FSrcFile := FG1
- else
- FSrcFile := FF1;
- TFileStream(FSrcFile).Seek(0, soFromBeginning);
- end;
- {--------}
- procedure TaaSorter.srQuickSortBuffer;
- begin
- Assert(FRecCount <> 0,
- 'calling quicksort with an empty buffer');
- QuickSort(FBuffer, FRecLen, FRecCount, FCompare);
- end;
- {--------}
- procedure TaaSorter.srSetCompare(aValue : TaaMergeCompare);
- begin
- {the compare function can only be set if we're not in the middle of
- adding or getting records}
- Assert((FState = WaitingState) or (FState = FinishedState),
- 'can only change the comparison function when the sorter is empty');
-
- FCompare := aValue;
- end;
- {--------}
- procedure TaaSorter.srMaxRecCount(aValue : integer);
- begin
- {the max record count can only be set if we're not in the middle of
- adding or getting records}
- Assert((FState = WaitingState) or (FState = FinishedState),
- 'can only change the maximum record count when the sorter is empty');
-
- {only do something if the user is changing the value}
- if (aValue <> FMaxRecCount) then begin
-
- {if we have allocated the buffer, free it}
- if (FBuffer <> nil) then begin
- FreeMem(FBuffer);
- FBuffer := nil;
- end;
-
- {set the new value}
- FMaxRecCount := aValue;
- end;
- end;
- {--------}
- procedure TaaSorter.srSetRecLen(aValue : integer);
- begin
- {the record length can only be set if we're not in the middle of
- adding or getting records}
- Assert((FState = WaitingState) or (FState = FinishedState),
- 'can only change the record length when the sorter is empty');
-
- {only do something if the user is changing the value}
- if (aValue <> FRecLen) then begin
-
- {if we have allocated the buffer, free it}
- if (FBuffer <> nil) then begin
- FreeMem(FBuffer);
- FBuffer := nil;
- end;
-
- {set the new value}
- FRecLen := aValue;
- end;
- end;
- {====================================================================}
-
- end.
-